{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "307804a3-c02b-4a57-ac0d-172c30ddc851",
   "metadata": {},
   "source": [
    "# Pinecone Vector Store - Auto Retriever"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "f7010b1d-d1bb-4f08-9309-a328bb4ea396",
   "metadata": {},
   "source": [
    "#### Creating a Pinecone Index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "d48af8e1",
   "metadata": {},
   "outputs": [],
   "source": [
    "import logging\n",
    "import sys\n",
    "import os\n",
    "\n",
    "logging.basicConfig(stream=sys.stdout, level=logging.INFO)\n",
    "logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "4ad14111-0bbb-4c62-906d-6d6253e0cdee",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pinecone\n",
    "\n",
    "api_key = os.environ[\"PINECONE_API_KEY\"]\n",
    "pinecone.init(api_key=api_key, environment=\"eu-west1-gcp\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "c2c90087-bdd9-4ca4-b06b-2af883559f88",
   "metadata": {},
   "outputs": [],
   "source": [
    "# dimensions are for text-embedding-ada-002\n",
    "try:\n",
    "    pinecone.create_index(\n",
    "        \"quickstart-index\", dimension=1536, metric=\"euclidean\", pod_type=\"p1\"\n",
    "    )\n",
    "except Exception:\n",
    "    # most likely index already exists\n",
    "    pass"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "667f3cb3-ce18-48d5-b9aa-bfc1a1f0f0f6",
   "metadata": {},
   "outputs": [],
   "source": [
    "pinecone_index = pinecone.Index(\"quickstart-index\")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "8ee4473a-094f-4d0a-a825-e1213db07240",
   "metadata": {},
   "source": [
    "#### Load documents, build the PineconeVectorStore and VectorStoreIndex"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "0a2bcc07",
   "metadata": {},
   "outputs": [],
   "source": [
    "from llama_index import VectorStoreIndex, StorageContext\n",
    "from llama_index.vector_stores import PineconeVectorStore"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "9ae59590",
   "metadata": {},
   "outputs": [],
   "source": [
    "from llama_index.schema import TextNode\n",
    "\n",
    "nodes = [\n",
    "    TextNode(\n",
    "        text=\"Michael Jordan is a retired professional basketball player, widely regarded as one of the greatest basketball players of all time.\",\n",
    "        metadata={\n",
    "            \"category\": \"Sports\",\n",
    "            \"country\": \"United States\",\n",
    "        },\n",
    "    ),\n",
    "    TextNode(\n",
    "        text=\"Angelina Jolie is an American actress, filmmaker, and humanitarian. She has received numerous awards for her acting and is known for her philanthropic work.\",\n",
    "        metadata={\n",
    "            \"category\": \"Entertainment\",\n",
    "            \"country\": \"United States\",\n",
    "        },\n",
    "    ),\n",
    "    TextNode(\n",
    "        text=\"Elon Musk is a business magnate, industrial designer, and engineer. He is the founder, CEO, and lead designer of SpaceX, Tesla, Inc., Neuralink, and The Boring Company.\",\n",
    "        metadata={\n",
    "            \"category\": \"Business\",\n",
    "            \"country\": \"United States\",\n",
    "        },\n",
    "    ),\n",
    "    TextNode(\n",
    "        text=\"Rihanna is a Barbadian singer, actress, and businesswoman. She has achieved significant success in the music industry and is known for her versatile musical style.\",\n",
    "        metadata={\n",
    "            \"category\": \"Music\",\n",
    "            \"country\": \"Barbados\",\n",
    "        },\n",
    "    ),\n",
    "    TextNode(\n",
    "        text=\"Cristiano Ronaldo is a Portuguese professional footballer who is considered one of the greatest football players of all time. He has won numerous awards and set multiple records during his career.\",\n",
    "        metadata={\n",
    "            \"category\": \"Sports\",\n",
    "            \"country\": \"Portugal\",\n",
    "        },\n",
    "    ),\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "ee6eeecb-d54f-4a71-b5fe-0cda8a5c3e10",
   "metadata": {},
   "outputs": [],
   "source": [
    "vector_store = PineconeVectorStore(pinecone_index=pinecone_index, namespace=\"test\")\n",
    "storage_context = StorageContext.from_defaults(vector_store=vector_store)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "cad08884",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total LLM token usage: 0 tokens\n",
      "> [build_index_from_nodes] Total LLM token usage: 0 tokens\n",
      "> [build_index_from_nodes] Total LLM token usage: 0 tokens\n",
      "INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total embedding token usage: 211 tokens\n",
      "> [build_index_from_nodes] Total embedding token usage: 211 tokens\n",
      "> [build_index_from_nodes] Total embedding token usage: 211 tokens\n"
     ]
    }
   ],
   "source": [
    "index = VectorStoreIndex(nodes, storage_context=storage_context)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "1a57e62f",
   "metadata": {},
   "outputs": [],
   "source": [
    "from llama_index.indices.vector_store.retrievers import VectorIndexAutoRetriever\n",
    "from llama_index.vector_stores.types import MetadataInfo, VectorStoreInfo\n",
    "\n",
    "\n",
    "vector_store_info = VectorStoreInfo(\n",
    "    content_info=\"brief biography of celebrities\",\n",
    "    metadata_info=[\n",
    "        MetadataInfo(\n",
    "            name=\"category\",\n",
    "            type=\"str\",\n",
    "            description=\"Category of the celebrity, one of [Sports, Entertainment, Business, Music]\",\n",
    "        ),\n",
    "        MetadataInfo(\n",
    "            name=\"country\",\n",
    "            type=\"str\",\n",
    "            description=\"Country of the celebrity, one of [United States, Barbados, Portugal]\",\n",
    "        ),\n",
    "    ],\n",
    ")\n",
    "retriever = VectorIndexAutoRetriever(index, vector_store_info=vector_store_info)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "a5c0490d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:llama_index.indices.vector_store.auto_retriever.auto_retriever:Auto query: celebrities\n",
      "Auto query: celebrities\n",
      "Auto query: celebrities\n",
      "INFO:llama_index.indices.vector_store.auto_retriever.auto_retriever:Auto filter: {'country': 'United States'}\n",
      "Auto filter: {'country': 'United States'}\n",
      "Auto filter: {'country': 'United States'}\n",
      "INFO:llama_index.indices.vector_store.auto_retriever.auto_retriever:Auto top_k: 2\n",
      "Auto top_k: 2\n",
      "Auto top_k: 2\n",
      "INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens\n",
      "> [retrieve] Total LLM token usage: 0 tokens\n",
      "> [retrieve] Total LLM token usage: 0 tokens\n",
      "INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 3 tokens\n",
      "> [retrieve] Total embedding token usage: 3 tokens\n",
      "> [retrieve] Total embedding token usage: 3 tokens\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[NodeWithScore(node=Node(text='category: Entertainment\\ncountry: United States\\n\\nAngelina Jolie is an American actress, filmmaker, and humanitarian. She has received numerous awards for her acting and is known for her philanthropic work.', doc_id='6821b1fe-e1dc-400c-ad2c-83f7fa683321', embedding=None, doc_hash='4086bd15d984c4f3ee3d4f911f0a347735406351d1936b6060b411707d3e82cc', extra_info={'category': 'Entertainment', 'country': 'United States'}, node_info={}, relationships={}), score=0.80265522),\n",
       " NodeWithScore(node=Node(text='category: Sports\\ncountry: United States\\n\\nMichael Jordan is a retired professional basketball player, widely regarded as one of the greatest basketball players of all time.', doc_id='4cf176e5-363f-479b-8979-c3e07cfaead8', embedding=None, doc_hash='9aaec18f659138a23ca519f8d6d1f3997d34aae993b8c07443b165c13163b886', extra_info={'category': 'Sports', 'country': 'United States'}, node_info={}, relationships={}), score=0.766244411)]"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "retriever.retrieve(\"Tell me about two celebrities from United States\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "3a1a9287",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:llama_index.indices.vector_store.auto_retriever.auto_retriever:Auto query: Sports celebrities\n",
      "Auto query: Sports celebrities\n",
      "Auto query: Sports celebrities\n",
      "INFO:llama_index.indices.vector_store.auto_retriever.auto_retriever:Auto filter: {'category': 'Sports', 'country': 'United States'}\n",
      "Auto filter: {'category': 'Sports', 'country': 'United States'}\n",
      "Auto filter: {'category': 'Sports', 'country': 'United States'}\n",
      "INFO:llama_index.indices.vector_store.auto_retriever.auto_retriever:Auto top_k: 2\n",
      "Auto top_k: 2\n",
      "Auto top_k: 2\n",
      "INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens\n",
      "> [retrieve] Total LLM token usage: 0 tokens\n",
      "> [retrieve] Total LLM token usage: 0 tokens\n",
      "INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 2 tokens\n",
      "> [retrieve] Total embedding token usage: 2 tokens\n",
      "> [retrieve] Total embedding token usage: 2 tokens\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[NodeWithScore(node=Node(text='category: Sports\\ncountry: United States\\n\\nMichael Jordan is a retired professional basketball player, widely regarded as one of the greatest basketball players of all time.', doc_id='4cf176e5-363f-479b-8979-c3e07cfaead8', embedding=None, doc_hash='9aaec18f659138a23ca519f8d6d1f3997d34aae993b8c07443b165c13163b886', extra_info={'category': 'Sports', 'country': 'United States'}, node_info={}, relationships={}), score=0.797632515)]"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "retriever.retrieve(\"Tell me about Sports celebrities from United States\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "50d622e3",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
